{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Diabetes Prediction",
      "provenance": [],
      "collapsed_sections": [],
      "authorship_tag": "ABX9TyMQ9DV7RXtf2xYSTKBCcIEA",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/SarraChebaane/Machine-Learning-Projects-for-Beginners/blob/main/Diabetes_Prediction.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Importing the Dependencies"
      ],
      "metadata": {
        "id": "oX-wzotZRlBp"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "from sklearn.preprocessing import StandardScaler\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn import svm\n",
        "from sklearn.metrics import accuracy_score"
      ],
      "metadata": {
        "id": "TPd6GS67RrnY"
      },
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Data Collection and Analysis"
      ],
      "metadata": {
        "id": "I16NylcNSRL8"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# loading the diabetes dataset to a pandas DataFrame\n",
        "diabetes_dataset = pd.read_csv('/content/diabetes.csv')"
      ],
      "metadata": {
        "id": "KloKVBFJSpHA"
      },
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# describtion of the function (help)\n",
        "pd.read_csv?"
      ],
      "metadata": {
        "id": "qldO-_xPTELl"
      },
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# printing the first 5 rows of the dataset \n",
        "diabetes_dataset.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "W9YlzKTMTYlt",
        "outputId": "8f2e264b-30c6-4b52-8f11-25bd66d9b912"
      },
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \\\n",
              "0            6      148             72             35        0  33.6   \n",
              "1            1       85             66             29        0  26.6   \n",
              "2            8      183             64              0        0  23.3   \n",
              "3            1       89             66             23       94  28.1   \n",
              "4            0      137             40             35      168  43.1   \n",
              "\n",
              "   DiabetesPedigreeFunction  Age  Outcome  \n",
              "0                     0.627   50        1  \n",
              "1                     0.351   31        0  \n",
              "2                     0.672   32        1  \n",
              "3                     0.167   21        0  \n",
              "4                     2.288   33        1  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-8d9b3c5f-18d3-476b-89dd-6668a18a8239\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Pregnancies</th>\n",
              "      <th>Glucose</th>\n",
              "      <th>BloodPressure</th>\n",
              "      <th>SkinThickness</th>\n",
              "      <th>Insulin</th>\n",
              "      <th>BMI</th>\n",
              "      <th>DiabetesPedigreeFunction</th>\n",
              "      <th>Age</th>\n",
              "      <th>Outcome</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>6</td>\n",
              "      <td>148</td>\n",
              "      <td>72</td>\n",
              "      <td>35</td>\n",
              "      <td>0</td>\n",
              "      <td>33.6</td>\n",
              "      <td>0.627</td>\n",
              "      <td>50</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1</td>\n",
              "      <td>85</td>\n",
              "      <td>66</td>\n",
              "      <td>29</td>\n",
              "      <td>0</td>\n",
              "      <td>26.6</td>\n",
              "      <td>0.351</td>\n",
              "      <td>31</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>8</td>\n",
              "      <td>183</td>\n",
              "      <td>64</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>23.3</td>\n",
              "      <td>0.672</td>\n",
              "      <td>32</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>1</td>\n",
              "      <td>89</td>\n",
              "      <td>66</td>\n",
              "      <td>23</td>\n",
              "      <td>94</td>\n",
              "      <td>28.1</td>\n",
              "      <td>0.167</td>\n",
              "      <td>21</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>0</td>\n",
              "      <td>137</td>\n",
              "      <td>40</td>\n",
              "      <td>35</td>\n",
              "      <td>168</td>\n",
              "      <td>43.1</td>\n",
              "      <td>2.288</td>\n",
              "      <td>33</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-8d9b3c5f-18d3-476b-89dd-6668a18a8239')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-8d9b3c5f-18d3-476b-89dd-6668a18a8239 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-8d9b3c5f-18d3-476b-89dd-6668a18a8239');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 7
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# number of rows ans columns in this dataset\n",
        "diabetes_dataset.shape"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "wd90YwrMTs9N",
        "outputId": "447fd3aa-f0c0-400b-9ac9-30d23faec7b3"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(768, 9)"
            ]
          },
          "metadata": {},
          "execution_count": 8
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# getting the statistical measures of the data \n",
        "diabetes_dataset.describe()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 300
        },
        "id": "aX0aBYi3ULf5",
        "outputId": "0c25806a-8291-42ef-d3b4-be184e7bdf03"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "       Pregnancies     Glucose  BloodPressure  SkinThickness     Insulin  \\\n",
              "count   768.000000  768.000000     768.000000     768.000000  768.000000   \n",
              "mean      3.845052  120.894531      69.105469      20.536458   79.799479   \n",
              "std       3.369578   31.972618      19.355807      15.952218  115.244002   \n",
              "min       0.000000    0.000000       0.000000       0.000000    0.000000   \n",
              "25%       1.000000   99.000000      62.000000       0.000000    0.000000   \n",
              "50%       3.000000  117.000000      72.000000      23.000000   30.500000   \n",
              "75%       6.000000  140.250000      80.000000      32.000000  127.250000   \n",
              "max      17.000000  199.000000     122.000000      99.000000  846.000000   \n",
              "\n",
              "              BMI  DiabetesPedigreeFunction         Age     Outcome  \n",
              "count  768.000000                768.000000  768.000000  768.000000  \n",
              "mean    31.992578                  0.471876   33.240885    0.348958  \n",
              "std      7.884160                  0.331329   11.760232    0.476951  \n",
              "min      0.000000                  0.078000   21.000000    0.000000  \n",
              "25%     27.300000                  0.243750   24.000000    0.000000  \n",
              "50%     32.000000                  0.372500   29.000000    0.000000  \n",
              "75%     36.600000                  0.626250   41.000000    1.000000  \n",
              "max     67.100000                  2.420000   81.000000    1.000000  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-2215e263-47ee-47ad-a43f-45fbd782d729\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Pregnancies</th>\n",
              "      <th>Glucose</th>\n",
              "      <th>BloodPressure</th>\n",
              "      <th>SkinThickness</th>\n",
              "      <th>Insulin</th>\n",
              "      <th>BMI</th>\n",
              "      <th>DiabetesPedigreeFunction</th>\n",
              "      <th>Age</th>\n",
              "      <th>Outcome</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>count</th>\n",
              "      <td>768.000000</td>\n",
              "      <td>768.000000</td>\n",
              "      <td>768.000000</td>\n",
              "      <td>768.000000</td>\n",
              "      <td>768.000000</td>\n",
              "      <td>768.000000</td>\n",
              "      <td>768.000000</td>\n",
              "      <td>768.000000</td>\n",
              "      <td>768.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>mean</th>\n",
              "      <td>3.845052</td>\n",
              "      <td>120.894531</td>\n",
              "      <td>69.105469</td>\n",
              "      <td>20.536458</td>\n",
              "      <td>79.799479</td>\n",
              "      <td>31.992578</td>\n",
              "      <td>0.471876</td>\n",
              "      <td>33.240885</td>\n",
              "      <td>0.348958</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>std</th>\n",
              "      <td>3.369578</td>\n",
              "      <td>31.972618</td>\n",
              "      <td>19.355807</td>\n",
              "      <td>15.952218</td>\n",
              "      <td>115.244002</td>\n",
              "      <td>7.884160</td>\n",
              "      <td>0.331329</td>\n",
              "      <td>11.760232</td>\n",
              "      <td>0.476951</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>min</th>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.078000</td>\n",
              "      <td>21.000000</td>\n",
              "      <td>0.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25%</th>\n",
              "      <td>1.000000</td>\n",
              "      <td>99.000000</td>\n",
              "      <td>62.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>27.300000</td>\n",
              "      <td>0.243750</td>\n",
              "      <td>24.000000</td>\n",
              "      <td>0.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>50%</th>\n",
              "      <td>3.000000</td>\n",
              "      <td>117.000000</td>\n",
              "      <td>72.000000</td>\n",
              "      <td>23.000000</td>\n",
              "      <td>30.500000</td>\n",
              "      <td>32.000000</td>\n",
              "      <td>0.372500</td>\n",
              "      <td>29.000000</td>\n",
              "      <td>0.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>75%</th>\n",
              "      <td>6.000000</td>\n",
              "      <td>140.250000</td>\n",
              "      <td>80.000000</td>\n",
              "      <td>32.000000</td>\n",
              "      <td>127.250000</td>\n",
              "      <td>36.600000</td>\n",
              "      <td>0.626250</td>\n",
              "      <td>41.000000</td>\n",
              "      <td>1.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>max</th>\n",
              "      <td>17.000000</td>\n",
              "      <td>199.000000</td>\n",
              "      <td>122.000000</td>\n",
              "      <td>99.000000</td>\n",
              "      <td>846.000000</td>\n",
              "      <td>67.100000</td>\n",
              "      <td>2.420000</td>\n",
              "      <td>81.000000</td>\n",
              "      <td>1.000000</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-2215e263-47ee-47ad-a43f-45fbd782d729')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-2215e263-47ee-47ad-a43f-45fbd782d729 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-2215e263-47ee-47ad-a43f-45fbd782d729');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 9
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "diabetes_dataset['Outcome'].value_counts()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "7Yuj__a8UdI7",
        "outputId": "2cb2680f-fb55-4abe-ae52-f371ff3f282e"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0    500\n",
              "1    268\n",
              "Name: Outcome, dtype: int64"
            ]
          },
          "metadata": {},
          "execution_count": 11
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "0 --> Non-Diabetic \n",
        "1 --> Diabetic "
      ],
      "metadata": {
        "id": "6bZ7rrfjU1OR"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "diabetes_dataset.groupby('Outcome').mean()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 143
        },
        "id": "dDUg_1gHU9By",
        "outputId": "f5b24949-4685-4ba0-c7cf-793c8f6d227d"
      },
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "         Pregnancies     Glucose  BloodPressure  SkinThickness     Insulin  \\\n",
              "Outcome                                                                      \n",
              "0           3.298000  109.980000      68.184000      19.664000   68.792000   \n",
              "1           4.865672  141.257463      70.824627      22.164179  100.335821   \n",
              "\n",
              "               BMI  DiabetesPedigreeFunction        Age  \n",
              "Outcome                                                  \n",
              "0        30.304200                  0.429734  31.190000  \n",
              "1        35.142537                  0.550500  37.067164  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-8148cee8-058b-457f-a85f-29b384f86883\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Pregnancies</th>\n",
              "      <th>Glucose</th>\n",
              "      <th>BloodPressure</th>\n",
              "      <th>SkinThickness</th>\n",
              "      <th>Insulin</th>\n",
              "      <th>BMI</th>\n",
              "      <th>DiabetesPedigreeFunction</th>\n",
              "      <th>Age</th>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>Outcome</th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>3.298000</td>\n",
              "      <td>109.980000</td>\n",
              "      <td>68.184000</td>\n",
              "      <td>19.664000</td>\n",
              "      <td>68.792000</td>\n",
              "      <td>30.304200</td>\n",
              "      <td>0.429734</td>\n",
              "      <td>31.190000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>4.865672</td>\n",
              "      <td>141.257463</td>\n",
              "      <td>70.824627</td>\n",
              "      <td>22.164179</td>\n",
              "      <td>100.335821</td>\n",
              "      <td>35.142537</td>\n",
              "      <td>0.550500</td>\n",
              "      <td>37.067164</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-8148cee8-058b-457f-a85f-29b384f86883')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-8148cee8-058b-457f-a85f-29b384f86883 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-8148cee8-058b-457f-a85f-29b384f86883');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 13
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# separating the data and labels\n",
        "X = diabetes_dataset.drop(columns = 'Outcome' , axis = 1)\n",
        "Y = diabetes_dataset['Outcome']"
      ],
      "metadata": {
        "id": "y3NMWgjTVP3E"
      },
      "execution_count": 14,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(X)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "tST91BCxVwoA",
        "outputId": "ac5cfd1a-c358-4525-9c70-cf87635ac556"
      },
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "     Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \\\n",
            "0              6      148             72             35        0  33.6   \n",
            "1              1       85             66             29        0  26.6   \n",
            "2              8      183             64              0        0  23.3   \n",
            "3              1       89             66             23       94  28.1   \n",
            "4              0      137             40             35      168  43.1   \n",
            "..           ...      ...            ...            ...      ...   ...   \n",
            "763           10      101             76             48      180  32.9   \n",
            "764            2      122             70             27        0  36.8   \n",
            "765            5      121             72             23      112  26.2   \n",
            "766            1      126             60              0        0  30.1   \n",
            "767            1       93             70             31        0  30.4   \n",
            "\n",
            "     DiabetesPedigreeFunction  Age  \n",
            "0                       0.627   50  \n",
            "1                       0.351   31  \n",
            "2                       0.672   32  \n",
            "3                       0.167   21  \n",
            "4                       2.288   33  \n",
            "..                        ...  ...  \n",
            "763                     0.171   63  \n",
            "764                     0.340   27  \n",
            "765                     0.245   30  \n",
            "766                     0.349   47  \n",
            "767                     0.315   23  \n",
            "\n",
            "[768 rows x 8 columns]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print(Y)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "7G80VN3UV2KT",
        "outputId": "6574491d-65c3-4bf9-e757-4cd408604f1e"
      },
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "0      1\n",
            "1      0\n",
            "2      1\n",
            "3      0\n",
            "4      1\n",
            "      ..\n",
            "763    0\n",
            "764    0\n",
            "765    0\n",
            "766    1\n",
            "767    0\n",
            "Name: Outcome, Length: 768, dtype: int64\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Data Standarization "
      ],
      "metadata": {
        "id": "GhseJTz6V7HX"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "scaler = StandardScaler()"
      ],
      "metadata": {
        "id": "Lo84liPYWGFD"
      },
      "execution_count": 19,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "scaler.fit(X)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "tRLV2VHAWPhg",
        "outputId": "0de53693-0a1a-4090-bac8-b007560277bf"
      },
      "execution_count": 20,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "StandardScaler()"
            ]
          },
          "metadata": {},
          "execution_count": 20
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "standardized_data = scaler.transform(X)   "
      ],
      "metadata": {
        "id": "Jh7WJiGZWTOn"
      },
      "execution_count": 21,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(standardized_data)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "RURhAPaQWmZE",
        "outputId": "a3128a9e-860a-482a-c124-64dc556f93b0"
      },
      "execution_count": 22,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[[ 0.63994726  0.84832379  0.14964075 ...  0.20401277  0.46849198\n",
            "   1.4259954 ]\n",
            " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
            "  -0.19067191]\n",
            " [ 1.23388019  1.94372388 -0.26394125 ... -1.10325546  0.60439732\n",
            "  -0.10558415]\n",
            " ...\n",
            " [ 0.3429808   0.00330087  0.14964075 ... -0.73518964 -0.68519336\n",
            "  -0.27575966]\n",
            " [-0.84488505  0.1597866  -0.47073225 ... -0.24020459 -0.37110101\n",
            "   1.17073215]\n",
            " [-0.84488505 -0.8730192   0.04624525 ... -0.20212881 -0.47378505\n",
            "  -0.87137393]]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "X = standardized_data\n",
        "Y = diabetes_dataset['Outcome']"
      ],
      "metadata": {
        "id": "HvbeVCViWtU1"
      },
      "execution_count": 23,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(X)\n",
        "print(Y)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "2rGXeHt1W01t",
        "outputId": "6c09eecb-236c-4ad7-a275-00952c05808c"
      },
      "execution_count": 24,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[[ 0.63994726  0.84832379  0.14964075 ...  0.20401277  0.46849198\n",
            "   1.4259954 ]\n",
            " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
            "  -0.19067191]\n",
            " [ 1.23388019  1.94372388 -0.26394125 ... -1.10325546  0.60439732\n",
            "  -0.10558415]\n",
            " ...\n",
            " [ 0.3429808   0.00330087  0.14964075 ... -0.73518964 -0.68519336\n",
            "  -0.27575966]\n",
            " [-0.84488505  0.1597866  -0.47073225 ... -0.24020459 -0.37110101\n",
            "   1.17073215]\n",
            " [-0.84488505 -0.8730192   0.04624525 ... -0.20212881 -0.47378505\n",
            "  -0.87137393]]\n",
            "0      1\n",
            "1      0\n",
            "2      1\n",
            "3      0\n",
            "4      1\n",
            "      ..\n",
            "763    0\n",
            "764    0\n",
            "765    0\n",
            "766    1\n",
            "767    0\n",
            "Name: Outcome, Length: 768, dtype: int64\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Train Test Split "
      ],
      "metadata": {
        "id": "30GISf_YXAnJ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,stratify=Y,random_state= 2)"
      ],
      "metadata": {
        "id": "sUzEnN8iXCyD"
      },
      "execution_count": 25,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(X.shape,X_train.shape,X_test.shape)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "_swV9dmQX2Xg",
        "outputId": "6ad31263-f37b-4944-e110-5adb3227f5d8"
      },
      "execution_count": 27,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(768, 8) (614, 8) (154, 8)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "classifier = svm.SVC(kernel='linear')"
      ],
      "metadata": {
        "id": "bVHxBeGFYG5W"
      },
      "execution_count": 28,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# training the support vector Machine Classifier\n",
        "classifier.fit(X_train,Y_train)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "wu6WxI7iYUE8",
        "outputId": "d1910c34-29b9-4465-e6ed-7d87cde8716e"
      },
      "execution_count": 30,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "SVC(kernel='linear')"
            ]
          },
          "metadata": {},
          "execution_count": 30
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Model Evaluation"
      ],
      "metadata": {
        "id": "lmFK5rURYmsj"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Accuracy Score"
      ],
      "metadata": {
        "id": "_7ja2fmtYqI0"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# accuracy score on the training data \n",
        "X_train_prediction = classifier.predict(X_train)\n",
        "training_data_accuracy = accuracy_score(X_train_prediction,Y_train)"
      ],
      "metadata": {
        "id": "_olJsLSGYoPv"
      },
      "execution_count": 33,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print('Accuracy score of the training data',training_data_accuracy)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "FDg_rfkcZUEV",
        "outputId": "2378e904-7bdf-4d98-9a50-becc5e40e577"
      },
      "execution_count": 34,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Accuracy score of the training data 0.7866449511400652\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# accuracy score on the test data \n",
        "X_test_prediction = classifier.predict(X_test)\n",
        "test_data_accuracy = accuracy_score(X_test_prediction,Y_test)"
      ],
      "metadata": {
        "id": "4PkjYcRwZg_I"
      },
      "execution_count": 37,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print('Accuracy score of the test data',test_data_accuracy)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "q50uuT_QZ5dw",
        "outputId": "ed3a9188-0eee-42ef-d83f-5a68a875861d"
      },
      "execution_count": 38,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Accuracy score of the test data 0.7727272727272727\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Making a Predictive System"
      ],
      "metadata": {
        "id": "0JQMU2iMbdOP"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "input_data = (8,183,64,0,0,23.3,0.672,32)\n",
        "\n",
        "# changing the input_data to numpy array\n",
        "input_data_as_numpy_array = np.asarray(input_data)\n",
        "\n",
        "# reshape the array as we are predicting for one instance\n",
        "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n",
        "\n",
        "# standarize the input data\n",
        "std_data = scaler.transform(input_data_reshaped)\n",
        "print(std_data)\n",
        "\n",
        "prediction = classifier.predict(std_data)\n",
        "print(prediction)\n",
        "\n",
        "\n",
        "if prediction[0] == 0 :\n",
        "  print('The Person is not diabetic ')\n",
        "else:\n",
        "  print('The person is diabetic')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "IJZm5giPaC2i",
        "outputId": "d4db3196-721e-4d0f-ac4a-c6329f0fe28d"
      },
      "execution_count": 43,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[[ 1.23388019  1.94372388 -0.26394125 -1.28821221 -0.69289057 -1.10325546\n",
            "   0.60439732 -0.10558415]]\n",
            "[1]\n",
            "The person is diabetic\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/sklearn/base.py:451: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
            "  \"X does not have valid feature names, but\"\n"
          ]
        }
      ]
    }
  ]
}